Open-Meteo maintains an API for historical weather that allows for non-commercial usage of historical weather data maintained by the website.
This file runs exploratory analysis on some of the historical weather data.
The exploration process uses tidyverse and several generic custom functions:
library(tidyverse) # tidyverse functionality is included throughout
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
source("./Generic_Added_Utility_Functions_202105_v001.R") # Basic functions
A sample of data for 365 days has been downloaded as a CSV. The downloaded data has three separate files included in a single tab, separated by a blank row. The first file is location data, the second file is hourly data, and the third file is daily data. For initial exploration, parameters specific to this file are used:
omFileLoc <- "./RInputFiles/openmeteo_20230612_example.csv"
# Location data
omLocation <- readr::read_csv(omFileLoc, n_max=1, skip=0)
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omLocation
## # A tibble: 1 × 6
## latitude longitude elevation utc_offset_seconds timezone timezone_abb…¹
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 41.8 -87.6 179 -18000 America/Chicago CDT
## # … with abbreviated variable name ¹timezone_abbreviation
# Hourly data
# Elements: time, 2m temp (C), 2m dew point (C), 2m relative humidity (%), precip (mm), rain (mm), and snow (cm)
omHourlyRaw <- readr::read_csv(omFileLoc, n_max=8760, skip=3)
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omHourlyProcess <- omHourlyRaw %>%
purrr::set_names(c("time", "temp2m_C", "relH2m", "dew2m_C", "precip_mm", "rain_mm", "snow_cm")) %>%
mutate(date=date(time))
omHourlyProcess
## # A tibble: 8,760 × 8
## time temp2…¹ relH2m dew2m_C preci…² rain_mm snow_cm date
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <date>
## 1 2022-06-08 00:00:00 13.4 91 11.9 0 0 0 2022-06-08
## 2 2022-06-08 01:00:00 13.4 91 12 0 0 0 2022-06-08
## 3 2022-06-08 02:00:00 13.8 87 11.8 0 0 0 2022-06-08
## 4 2022-06-08 03:00:00 13.8 87 11.7 0 0 0 2022-06-08
## 5 2022-06-08 04:00:00 14 85 11.6 0 0 0 2022-06-08
## 6 2022-06-08 05:00:00 14.4 82 11.3 0 0 0 2022-06-08
## 7 2022-06-08 06:00:00 14.8 79 11.1 0 0 0 2022-06-08
## 8 2022-06-08 07:00:00 15.1 77 11.1 0.1 0.1 0 2022-06-08
## 9 2022-06-08 08:00:00 15.7 75 11.2 0 0 0 2022-06-08
## 10 2022-06-08 09:00:00 16.4 72 11.3 0 0 0 2022-06-08
## # … with 8,750 more rows, and abbreviated variable names ¹temp2m_C, ²precip_mm
summary(omHourlyProcess)
## time temp2m_C relH2m
## Min. :2022-06-08 00:00:00 Min. :-20.60 Min. : 32.00
## 1st Qu.:2022-09-07 05:45:00 1st Qu.: 2.80 1st Qu.: 62.00
## Median :2022-12-07 11:30:00 Median : 10.30 Median : 72.00
## Mean :2022-12-07 11:30:00 Mean : 10.81 Mean : 72.38
## 3rd Qu.:2023-03-08 17:15:00 3rd Qu.: 19.80 3rd Qu.: 83.00
## Max. :2023-06-07 23:00:00 Max. : 31.50 Max. :100.00
## NA's :53 NA's :53
## dew2m_C precip_mm rain_mm snow_cm
## Min. :-24.300 Min. : 0.00000 Min. : 0.00000 Min. :0.00000
## 1st Qu.: -1.400 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.:0.00000
## Median : 5.500 Median : 0.00000 Median : 0.00000 Median :0.00000
## Mean : 5.792 Mean : 0.09986 Mean : 0.09167 Mean :0.00573
## 3rd Qu.: 14.700 3rd Qu.: 0.00000 3rd Qu.: 0.00000 3rd Qu.:0.00000
## Max. : 24.200 Max. :11.10000 Max. :11.10000 Max. :1.26000
## NA's :53 NA's :53 NA's :53 NA's :53
## date
## Min. :2022-06-08
## 1st Qu.:2022-09-07
## Median :2022-12-07
## Mean :2022-12-07
## 3rd Qu.:2023-03-08
## Max. :2023-06-07
##
# Daily data
# Elements: date, sum of precip (mm), sum of rain (mm), and sum of snow (cm)
omDailyRaw <- readr::read_csv(omFileLoc, n_max=365, skip=8765)
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omDailyProcess <- omDailyRaw %>%
purrr::set_names(c("date", "precip_mm", "rain_mm", "snow_cm"))
omDailyProcess
## # A tibble: 365 × 4
## date precip_mm rain_mm snow_cm
## <date> <dbl> <dbl> <dbl>
## 1 2022-06-08 16 16 0
## 2 2022-06-09 0 0 0
## 3 2022-06-10 0.6 0.6 0
## 4 2022-06-11 0 0 0
## 5 2022-06-12 1.3 1.3 0
## 6 2022-06-13 2.6 2.6 0
## 7 2022-06-14 0 0 0
## 8 2022-06-15 0 0 0
## 9 2022-06-16 9.5 9.5 0
## 10 2022-06-17 0 0 0
## # … with 355 more rows
summary(omDailyProcess)
## date precip_mm rain_mm snow_cm
## Min. :2022-06-08 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.:2022-09-07 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.:0.0000
## Median :2022-12-07 Median : 0.000 Median : 0.000 Median :0.0000
## Mean :2022-12-07 Mean : 2.402 Mean : 2.205 Mean :0.1379
## 3rd Qu.:2023-03-08 3rd Qu.: 1.875 3rd Qu.: 1.300 3rd Qu.:0.0000
## Max. :2023-06-07 Max. :40.000 Max. :40.000 Max. :6.6500
## NA's :3 NA's :3 NA's :3
A function is written to read a portion of a CSV file:
partialCSVRead <- function(loc, firstRow=1L, lastRow=+Inf, col_names=TRUE, ...) {
# FUNCTION arguments
# loc: file location
# firstRow: first row that is relevant to the partial file read (whether header line or data line)
# last Row: last row that is relevant to the partial file read (+Inf means read until last line of file)
# col_names: the col_names parameter passed to readr::read_csv
# TRUE means header=TRUE (get column names from file, read data starting on next line)
# FALSE means header=FALSE (auto-generate column names, read data starting on first line)
# character vector means use these as column names (read data starting on first line)
# ...: additional arguments passed to read_csv
# Read the file and return
# skip: rows to be skipped are all those prior to firstRow
# n_max: maximum rows read are lastRow-firstRow, with an additional data row when col_names is not TRUE
readr::read_csv(loc,
skip=firstRow-1,
n_max=lastRow-firstRow+ifelse(isTRUE(col_names), 0, 1),
...
)
}
# Double check that data are the same
partialCSVRead(omFileLoc, firstRow=1L, lastRow=2L) %>% all.equal(omLocation)
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE
partialCSVRead(omFileLoc, firstRow=4L, lastRow=8764L) %>% all.equal(omHourlyRaw)
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE
partialCSVRead(omFileLoc, firstRow=8766L, lastRow=+Inf) %>% all.equal(omDailyRaw)
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE
The blank lines are assessed, allowing for all tables to be read at the same time:
# Get the break points for gaps in a vector (e.g., 0, 3, 5:8, 20 has break points 0, 3, 5, 20 and 0, 3, 8, 30)
vecGaps <- function(x, addElements=c(), sortUnique=TRUE) {
if(length(addElements)>0) x <- c(addElements, x)
if(isTRUE(sortUnique)) x <- unique(sort(x))
list("starts"=c(x[is.na(lag(x)) | x-lag(x)>1], +Inf),
"ends"=x[is.na(lead(x)) | lead(x)-x>1]
)
}
vecGaps(c(3, 5:8, 20), addElements=0)
## $starts
## [1] 0 3 5 20 Inf
##
## $ends
## [1] 0 3 8 20
# Find the break points in a single file
flatFileGaps <- function(loc) {
which(stringr::str_length(readLines(loc))==0) %>% vecGaps(addElements=0)
}
flatFileGaps(omFileLoc)
## $starts
## [1] 0 3 8765 Inf
##
## $ends
## [1] 0 3 8765
# Read all relevant data as CSV with header
readMultiCSV <- function(loc, col_names=TRUE, ...) {
gaps <- flatFileGaps(loc)
lapply(seq_along(gaps$ends),
FUN=function(x) partialCSVRead(loc,
firstRow=gaps$ends[x]+1,
lastRow=gaps$starts[x+1]-1,
col_names=col_names,
...
)
)
}
tstMultiCSV <- readMultiCSV(omFileLoc)
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all.equal(tstMultiCSV[[1]], omLocation)
## [1] TRUE
all.equal(tstMultiCSV[[2]], omHourlyRaw)
## [1] TRUE
all.equal(tstMultiCSV[[3]], omDailyRaw)
## [1] TRUE
Data can also be downloaded through the Open-Meteo API, returning a JSON file. The data download has been completed off-line to minimize repeated hits against the server. The JSON file can then be read:
# Example download sequence
# download.file("https://archive-api.open-meteo.com/v1/archive?latitude=41.85&longitude=-87.65&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago", "tempOM")
# Create hourly data tibble
jsonHourly <- jsonlite::read_json("tempOM", simplifyVector = TRUE)[["hourly"]] %>%
tibble::as_tibble() %>%
mutate(tm=lubridate::ymd_hm(time), date=date(tm))
jsonHourly
## # A tibble: 8,952 × 9
## time tempe…¹ relat…² dewpo…³ preci…⁴ rain snowf…⁵ tm
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dttm>
## 1 2022-06-01… 21 92 19.6 0.1 0.1 0 2022-06-01 00:00:00
## 2 2022-06-01… 20.6 93 19.5 0.3 0.3 0 2022-06-01 01:00:00
## 3 2022-06-01… 21 93 19.8 0 0 0 2022-06-01 02:00:00
## 4 2022-06-01… 20.8 93 19.7 0 0 0 2022-06-01 03:00:00
## 5 2022-06-01… 20.5 93 19.4 0 0 0 2022-06-01 04:00:00
## 6 2022-06-01… 19.8 95 19 0.7 0.7 0 2022-06-01 05:00:00
## 7 2022-06-01… 19.3 97 18.8 1.6 1.6 0 2022-06-01 06:00:00
## 8 2022-06-01… 19 97 18.4 1 1 0 2022-06-01 07:00:00
## 9 2022-06-01… 18.1 92 16.9 0.1 0.1 0 2022-06-01 08:00:00
## 10 2022-06-01… 16.8 87 14.6 0 0 0 2022-06-01 09:00:00
## # … with 8,942 more rows, 1 more variable: date <date>, and abbreviated
## # variable names ¹temperature_2m, ²relativehumidity_2m, ³dewpoint_2m,
## # ⁴precipitation, ⁵snowfall
# Create daily data tibble
jsonDaily <- jsonlite::read_json("tempOM", simplifyVector = TRUE)[["daily"]] %>%
tibble::as_tibble()
jsonDaily
## # A tibble: 373 × 4
## time precipitation_sum rain_sum snowfall_sum
## <chr> <dbl> <dbl> <dbl>
## 1 2022-06-01 3.8 3.8 0
## 2 2022-06-02 0 0 0
## 3 2022-06-03 0 0 0
## 4 2022-06-04 1.3 1.3 0
## 5 2022-06-05 0.3 0.3 0
## 6 2022-06-06 12.5 12.5 0
## 7 2022-06-07 2 2 0
## 8 2022-06-08 16 16 0
## 9 2022-06-09 0 0 0
## 10 2022-06-10 0.6 0.6 0
## # … with 363 more rows
# Extract other elements
jsonNames <- jsonlite::read_json("tempOM", simplifyVector = TRUE) %>% names
for (jsonName in jsonNames[!(jsonNames %in% c("daily", "hourly", "daily_units", "hourly_units"))]) {
cat("\n", jsonName, ":", jsonlite::read_json("tempOM", simplifyVector = TRUE)[[jsonName]])
}
##
## latitude : 41.8
## longitude : -87.6
## generationtime_ms : 2.892971
## utc_offset_seconds : -18000
## timezone : America/Chicago
## timezone_abbreviation : CDT
## elevation : 179
for (jsonName in jsonNames[jsonNames %in% c("daily_units", "hourly_units")]) {
cat("\n", jsonName, ":\n")
print(jsonlite::read_json("tempOM", simplifyVector = TRUE)[[jsonName]] %>% tibble::as_tibble() %>% t())
}
##
## hourly_units :
## [,1]
## time "iso8601"
## temperature_2m "°C"
## relativehumidity_2m "%"
## dewpoint_2m "°C"
## precipitation "mm"
## rain "mm"
## snowfall "cm"
##
## daily_units :
## [,1]
## time "iso8601"
## precipitation_sum "mm"
## rain_sum "mm"
## snowfall_sum "cm"
Daily data read from JSON and CSV are compared:
# Convert variable names in JSON daily data
jsonDailyProcess <- jsonDaily %>%
colRenamer(vecRename=c("precipitation_sum"="precip_mm",
"rain_sum"="rain_mm",
"snowfall_sum"="snow_cm",
"time"="date"
)
) %>%
mutate(date=as.Date(date))
jsonDailyProcess
## # A tibble: 373 × 4
## date precip_mm rain_mm snow_cm
## <date> <dbl> <dbl> <dbl>
## 1 2022-06-01 3.8 3.8 0
## 2 2022-06-02 0 0 0
## 3 2022-06-03 0 0 0
## 4 2022-06-04 1.3 1.3 0
## 5 2022-06-05 0.3 0.3 0
## 6 2022-06-06 12.5 12.5 0
## 7 2022-06-07 2 2 0
## 8 2022-06-08 16 16 0
## 9 2022-06-09 0 0 0
## 10 2022-06-10 0.6 0.6 0
## # … with 363 more rows
# Check dates included
omDailyProcess %>%
select(date) %>%
mutate(inCSV=1) %>%
full_join(mutate(select(jsonDailyProcess, "date"), inJSON=1), by="date") %>%
filter(!complete.cases(.))
## # A tibble: 8 × 3
## date inCSV inJSON
## <date> <dbl> <dbl>
## 1 2022-06-01 NA 1
## 2 2022-06-02 NA 1
## 3 2022-06-03 NA 1
## 4 2022-06-04 NA 1
## 5 2022-06-05 NA 1
## 6 2022-06-06 NA 1
## 7 2022-06-07 NA 1
## 8 2023-06-08 NA 1
# Check column names
all.equal(names(omDailyProcess), names(jsonDailyProcess))
## [1] TRUE
# Check data elements from 2022-06-08 through 2023-06-04 (last full day of data)
all.equal(omDailyProcess %>% tibble::as_tibble() %>% filter(date>="2022-06-08", date<="2023-06-04"),
jsonDailyProcess %>% filter(date>="2022-06-08", date<="2023-06-04")
)
## [1] TRUE
Hourly data read from JSON and CSV are compared:
# Convert variable names in JSON hourly data
jsonHourlyProcess <- jsonHourly %>%
select(-time) %>%
colRenamer(vecRename=c("temperature_2m"="temp2m_C",
"relativehumidity_2m"="relH2m",
"dewpoint_2m"="dew2m_C",
"precipitation"="precip_mm",
"rain"="rain_mm",
"snowfall"="snow_cm",
"tm"="time"
)
) %>%
select(time, everything())
jsonHourlyProcess
## # A tibble: 8,952 × 8
## time temp2…¹ relH2m dew2m_C preci…² rain_mm snow_cm date
## <dttm> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <date>
## 1 2022-06-01 00:00:00 21 92 19.6 0.1 0.1 0 2022-06-01
## 2 2022-06-01 01:00:00 20.6 93 19.5 0.3 0.3 0 2022-06-01
## 3 2022-06-01 02:00:00 21 93 19.8 0 0 0 2022-06-01
## 4 2022-06-01 03:00:00 20.8 93 19.7 0 0 0 2022-06-01
## 5 2022-06-01 04:00:00 20.5 93 19.4 0 0 0 2022-06-01
## 6 2022-06-01 05:00:00 19.8 95 19 0.7 0.7 0 2022-06-01
## 7 2022-06-01 06:00:00 19.3 97 18.8 1.6 1.6 0 2022-06-01
## 8 2022-06-01 07:00:00 19 97 18.4 1 1 0 2022-06-01
## 9 2022-06-01 08:00:00 18.1 92 16.9 0.1 0.1 0 2022-06-01
## 10 2022-06-01 09:00:00 16.8 87 14.6 0 0 0 2022-06-01
## # … with 8,942 more rows, and abbreviated variable names ¹temp2m_C, ²precip_mm
# Check dates included
omHourlyProcess %>%
count(date, name="nCSV") %>%
full_join(count(jsonHourlyProcess, date, name="nJSON"), by="date") %>%
filter(!complete.cases(.))
## # A tibble: 8 × 3
## date nCSV nJSON
## <date> <int> <int>
## 1 2022-06-01 NA 24
## 2 2022-06-02 NA 24
## 3 2022-06-03 NA 24
## 4 2022-06-04 NA 24
## 5 2022-06-05 NA 24
## 6 2022-06-06 NA 24
## 7 2022-06-07 NA 24
## 8 2023-06-08 NA 24
# Check column names
all.equal(names(omHourlyProcess), names(jsonHourlyProcess))
## [1] TRUE
# Check data elements from 2022-06-08 through 2023-06-04 (last full day of data)
all.equal(omHourlyProcess %>% tibble::as_tibble() %>% filter(date>="2022-06-08", date<="2023-06-04"),
jsonHourlyProcess %>% filter(date>="2022-06-08", date<="2023-06-04")
)
## [1] TRUE
Metrics that can be reuested for hourly and daily data include:
hourlyMetrics <- "temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm"
dailyMetrics <- "weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration"
hourlyDescription <- "Air temperature at 2 meters above ground\nRelative humidity at 2 meters above ground\nDew point temperature at 2 meters above ground\nApparent temperature is the perceived feels-like temperature combining wind chill factor, relative humidity and solar radiation\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nTotal precipitation (rain, showers, snow) sum of the preceding hour. Data is stored with a 0.1 mm precision. If precipitation data is summed up to monthly sums, there might be small inconsistencies with the total precipitation amount.\nOnly liquid precipitation of the preceding hour including local showers and rain from large scale systems.\nSnowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent\nTotal cloud cover as an area fraction\nLow level clouds and fog up to 2 km altitude\nMid level clouds from 2 to 6 km altitude\nHigh level clouds from 6 km altitude\nShortwave solar radiation as average of the preceding hour. This is equal to the total global horizontal irradiation\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDiffuse solar radiation as average of the preceding hour\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind direction at 10 or 100 meters above ground\nWind direction at 10 or 100 meters above ground\nGusts at 10 meters above ground of the indicated hour. Wind gusts in CERRA are defined as the maximum wind gusts of the preceding hour. Please consult the ECMWF IFS documentation for more information on how wind gusts are parameterized in weather models.\nET0 Reference Evapotranspiration of a well watered grass field. Based on FAO-56 Penman-Monteith equations ET0 is calculated from temperature, wind speed, humidity and solar radiation. Unlimited soil water is assumed. ET0 is commonly used to estimate the required irrigation for plants.\nWeather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details. Weather code is calculated from cloud cover analysis, precipitation and snowfall. As barely no information about atmospheric stability is available, estimation about thunderstorms is not possible.\nVapor Pressure Deificit (VPD) in kilopascal (kPa). For high VPD (>1.6), water transpiration of plants increases. For low VPD (<0.4), transpiration decreases\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths."
dailyDescription <- "The most severe weather condition on a given day\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily apparent temperature\nMaximum and minimum daily apparent temperature\nSum of daily precipitation (including rain, showers and snowfall)\nSum of daily rain\nSum of daily snowfall\nThe number of hours with rain\nSun rise and set times\nSun rise and set times\nMaximum wind speed and gusts on a day\nMaximum wind speed and gusts on a day\nDominant wind direction\nThe sum of solar radiaion on a given day in Megajoules\nDaily sum of ET0 Reference Evapotranspiration of a well watered grass field"
# Create tibble for hourly metrics
tblMetricsHourly <- tibble::tibble(metric=hourlyMetrics %>% str_split_1(","),
description=hourlyDescription %>% str_split_1("\n")
)
tblMetricsHourly %>%
print(n=50)
## # A tibble: 33 × 2
## metric description
## <chr> <chr>
## 1 temperature_2m Air temperature at 2 meters above ground
## 2 relativehumidity_2m Relative humidity at 2 meters above ground
## 3 dewpoint_2m Dew point temperature at 2 meters above ground
## 4 apparent_temperature Apparent temperature is the perceived feels-li…
## 5 pressure_msl Atmospheric air pressure reduced to mean sea l…
## 6 surface_pressure Atmospheric air pressure reduced to mean sea l…
## 7 precipitation Total precipitation (rain, showers, snow) sum …
## 8 rain Only liquid precipitation of the preceding hou…
## 9 snowfall Snowfall amount of the preceding hour in centi…
## 10 cloudcover Total cloud cover as an area fraction
## 11 cloudcover_low Low level clouds and fog up to 2 km altitude
## 12 cloudcover_mid Mid level clouds from 2 to 6 km altitude
## 13 cloudcover_high High level clouds from 6 km altitude
## 14 shortwave_radiation Shortwave solar radiation as average of the pr…
## 15 direct_radiation Direct solar radiation as average of the prece…
## 16 direct_normal_irradiance Direct solar radiation as average of the prece…
## 17 diffuse_radiation Diffuse solar radiation as average of the prec…
## 18 windspeed_10m Wind speed at 10 or 100 meters above ground. W…
## 19 windspeed_100m Wind speed at 10 or 100 meters above ground. W…
## 20 winddirection_10m Wind direction at 10 or 100 meters above ground
## 21 winddirection_100m Wind direction at 10 or 100 meters above ground
## 22 windgusts_10m Gusts at 10 meters above ground of the indicat…
## 23 et0_fao_evapotranspiration ET0 Reference Evapotranspiration of a well wat…
## 24 weathercode Weather condition as a numeric code. Follow WM…
## 25 vapor_pressure_deficit Vapor Pressure Deificit (VPD) in kilopascal (k…
## 26 soil_temperature_0_to_7cm Average temperature of different soil levels b…
## 27 soil_temperature_7_to_28cm Average temperature of different soil levels b…
## 28 soil_temperature_28_to_100cm Average temperature of different soil levels b…
## 29 soil_temperature_100_to_255cm Average temperature of different soil levels b…
## 30 soil_moisture_0_to_7cm Average soil water content as volumetric mixin…
## 31 soil_moisture_7_to_28cm Average soil water content as volumetric mixin…
## 32 soil_moisture_28_to_100cm Average soil water content as volumetric mixin…
## 33 soil_moisture_100_to_255cm Average soil water content as volumetric mixin…
# Create tibble for daily metrics
tblMetricsDaily <- tibble::tibble(metric=dailyMetrics %>% str_split_1(","),
description=dailyDescription %>% str_split_1("\n")
)
tblMetricsDaily
## # A tibble: 16 × 2
## metric description
## <chr> <chr>
## 1 weathercode The most severe weather condition on a given day
## 2 temperature_2m_max Maximum and minimum daily air temperature at 2 me…
## 3 temperature_2m_min Maximum and minimum daily air temperature at 2 me…
## 4 apparent_temperature_max Maximum and minimum daily apparent temperature
## 5 apparent_temperature_min Maximum and minimum daily apparent temperature
## 6 precipitation_sum Sum of daily precipitation (including rain, showe…
## 7 rain_sum Sum of daily rain
## 8 snowfall_sum Sum of daily snowfall
## 9 precipitation_hours The number of hours with rain
## 10 sunrise Sun rise and set times
## 11 sunset Sun rise and set times
## 12 windspeed_10m_max Maximum wind speed and gusts on a day
## 13 windgusts_10m_max Maximum wind speed and gusts on a day
## 14 winddirection_10m_dominant Dominant wind direction
## 15 shortwave_radiation_sum The sum of solar radiaion on a given day in Megaj…
## 16 et0_fao_evapotranspiration Daily sum of ET0 Reference Evapotranspiration of …
Data can then be assembled into a string that is compatible with the Open-Meteo API format:
openMeteoURLCreate <- function(mainURL="https://archive-api.open-meteo.com/v1/archive",
lat=45,
lon=-90,
startDate=paste(year(Sys.Date())-1, "01", "01", sep="-"),
endDate=paste(year(Sys.Date())-1, "12", "31", sep="-"),
hourlyMetrics=NULL,
dailyMetrics=NULL,
tz="GMT",
...
) {
# Create formatted string
fString <- paste0(mainURL,
"?latitude=",
lat,
"&longitude=",
lon,
"&start_date=",
startDate,
"&end_date=",
endDate
)
if(!is.null(hourlyMetrics)) fString <- paste0(fString, "&hourly=", hourlyMetrics)
if(!is.null(dailyMetrics)) fString <- paste0(fString, "&daily=", dailyMetrics)
# Return the formatted string
paste0(fString, "&timezone=", stringr::str_replace(tz, "/", "%2F"), ...)
}
# Blank example
openMeteoURLCreate()
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=45&longitude=-90&start_date=2022-01-01&end_date=2022-12-31&timezone=GMT"
# Matching previous CSV data pull
openMeteoURLCreate(lat=41.85,
lon=-87.65,
startDate="2022-06-01",
endDate="2023-06-08",
hourlyMetrics="temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall",
dailyMetrics="precipitation_sum,rain_sum,snowfall_sum",
tz="America/Chicago"
)
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.85&longitude=-87.65&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago"
A helper function is created to convert cities to lat/lon and to allow for selection of hourly and daily metrics by index number:
helperOpenMeteoURL <- function(cityName=NULL,
lat=NULL,
lon=NULL,
hourlyMetrics=NULL,
hourlyIndices=NULL,
hourlyDesc=tblMetricsHourly,
dailyMetrics=NULL,
dailyIndices=NULL,
dailyDesc=tblMetricsDaily,
startDate=NULL,
endDate=NULL,
tz=NULL,
...
) {
# Convert city to lat/lon if lat/lon are NULL
if(is.null(lat) | is.null(lon)) {
if(is.null(cityName)) stop("\nMust provide lat/lon or city name available in maps::us.cities\n")
cityData <- maps::us.cities %>% tibble::as_tibble() %>% filter(name==cityName)
if(nrow(cityData)!=1) stop("\nMust provide city name that maps uniquely to maps::us.cities$name\n")
lat <- cityData$lat[1]
lon <- cityData$long[1]
}
# Get hourly metrics by index if relevant
if(is.null(hourlyMetrics) & !is.null(hourlyIndices)) {
hourlyMetrics <- hourlyDesc %>% slice(hourlyIndices) %>% pull(metric)
hourlyMetrics <- paste0(hourlyMetrics, collapse=",")
cat("\nHourly metrics created from indices:", hourlyMetrics, "\n\n")
}
# Get daily metrics by index if relevant
if(is.null(dailyMetrics) & !is.null(dailyIndices)) {
dailyMetrics <- dailyDesc %>% slice(dailyIndices) %>% pull(metric)
dailyMetrics <- paste0(dailyMetrics, collapse=",")
cat("\nDaily metrics created from indices:", dailyMetrics, "\n\n")
}
# Use default values from OpenMeteoURLCreate() for startDate, endDate, and tz if passed as NULL
if(is.null(startDate)) startDate <- eval(formals(openMeteoURLCreate)$startDate)
if(is.null(endDate)) endDate <- eval(formals(openMeteoURLCreate)$endDate)
if(is.null(tz)) tz <- eval(formals(openMeteoURLCreate)$tz)
# Create and return URL
openMeteoURLCreate(lat=lat,
lon=lon,
startDate=startDate,
endDate=endDate,
hourlyMetrics=hourlyMetrics,
dailyMetrics=dailyMetrics,
tz=tz,
...
)
}
The URL is tested for file download, cached to avoid multiple hits to the server:
testURL <- helperOpenMeteoURL(cityName="Chicago IL",
hourlyIndices=c(1:3, 7:9),
dailyIndices=6:8,
startDate="2022-06-01",
endDate="2023-06-08",
tz="America/Chicago"
)
##
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall
##
##
## Daily metrics created from indices: precipitation_sum,rain_sum,snowfall_sum
testURL
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM.json")) {
fileDownload(fileName="notuse_testOM.json", url=testURL)
} else {
cat("\nFile notuse_testOM.json already exists, skipping download\n")
}
## size isdir mode mtime ctime
## notuse_testOM.json 426872 FALSE 666 2023-06-19 08:56:49 2023-06-19 08:56:45
## atime exe
## notuse_testOM.json 2023-06-19 08:56:49 no
Code is created to read the JSON return object:
readOpenMeteoJSON <- function(js) {
# FUNCTION arguments:
# js: JSON list returned by download from Open-Meteo
# Get the object and names
jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
nms <- jsObj %>% names()
cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
# Set default objects as NULL
tblDaily <- NULL
tblHourly <- NULL
tblUnitsDaily <- NULL
tblUnitsHourly <- NULL
# Get daily and hourly as tibble if relevant
if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble()
if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble()
# Helper function for unit conversions
helperMetricUnit <- function(x, mapper, desc) {
x %>%
tibble::as_tibble() %>%
pivot_longer(cols=everything()) %>%
left_join(mapper, by=c("name"="metric")) %>%
mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>%
mutate(metricType=desc) %>%
select(metricType, everything())
}
# Get the unit descriptions
if("daily_units" %in% nms)
tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, tblMetricsDaily, desc="daily_units")
if("hourly_units" %in% nms)
tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, tblMetricsHourly, desc="hourly_units")
if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly))
tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
else tblUnits <- NULL
# Put everything else together
tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
tibble::as_tibble()
# Return the list objects
list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
}
prettyOpenMeteoMeta <- function(df, extr="tblDescription") {
if("list" %in% class(df)) df <- df[[extr]]
for(name in names(df)) {
cat("\n", name, ": ", df %>% pull(name), sep="")
}
cat("\n\n")
}
tmpOM <- readOpenMeteoJSON("notuse_testOM.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly, daily_units, daily
tmpOM
## $tblDaily
## # A tibble: 373 × 4
## time precipitation_sum rain_sum snowfall_sum
## <chr> <dbl> <dbl> <dbl>
## 1 2022-06-01 3.8 3.8 0
## 2 2022-06-02 0 0 0
## 3 2022-06-03 0 0 0
## 4 2022-06-04 1.3 1.3 0
## 5 2022-06-05 0.3 0.3 0
## 6 2022-06-06 12.5 12.5 0
## 7 2022-06-07 2 2 0
## 8 2022-06-08 16 16 0
## 9 2022-06-09 0 0 0
## 10 2022-06-10 0.6 0.6 0
## # … with 363 more rows
##
## $tblHourly
## # A tibble: 8,952 × 7
## time temperature_2m relativehumid…¹ dewpo…² preci…³ rain snowf…⁴
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2022-06-01T00:00 21 92 19.7 0.1 0.1 0
## 2 2022-06-01T01:00 20.6 94 19.6 0.3 0.3 0
## 3 2022-06-01T02:00 21.1 93 19.9 0 0 0
## 4 2022-06-01T03:00 20.8 93 19.7 0 0 0
## 5 2022-06-01T04:00 20.5 93 19.3 0 0 0
## 6 2022-06-01T05:00 19.7 95 19 0.7 0.7 0
## 7 2022-06-01T06:00 19.4 96 18.8 1.6 1.6 0
## 8 2022-06-01T07:00 19.2 96 18.5 1 1 0
## 9 2022-06-01T08:00 18.6 90 17 0.1 0.1 0
## 10 2022-06-01T09:00 17.7 84 14.9 0 0 0
## # … with 8,942 more rows, and abbreviated variable names ¹relativehumidity_2m,
## # ²dewpoint_2m, ³precipitation, ⁴snowfall
##
## $tblUnits
## # A tibble: 11 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above g…
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters above…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters a…
## 5 hourly_units precipitation mm Total precipitation (rain, showers,…
## 6 hourly_units rain mm Only liquid precipitation of the pr…
## 7 hourly_units snowfall cm Snowfall amount of the preceding ho…
## 8 daily_units time iso8601 <NA>
## 9 daily_units precipitation_sum mm Sum of daily precipitation (includi…
## 10 daily_units rain_sum mm Sum of daily rain
## 11 daily_units snowfall_sum cm Sum of daily snowfall
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
## <dbl> <dbl> <dbl> <int> <chr> <chr> <dbl>
## 1 41.8 -87.7 2.66 -18000 Americ… CDT 180
## # … with abbreviated variable names ¹utc_offset_seconds, ²timezone,
## # ³timezone_abbreviation, ⁴elevation
prettyOpenMeteoMeta(tmpOM)
##
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 2.658963
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
Conversion functions are written for hourly and daily data:
omProcessDaily <- function(tbl, extr="tblDaily") {
if("list" %in% class(tbl)) tbl <- tbl[[extr]]
tbl %>% mutate(date=lubridate::ymd(time)) %>% select(date, everything())
}
omProcessHourly <- function(tbl, extr="tblHourly") {
if("list" %in% class(tbl)) tbl <- tbl[[extr]]
tbl %>%
mutate(origTime=time,
time=lubridate::ymd_hm(time),
date=lubridate::date(time),
hour=lubridate::hour(time)
) %>%
select(time, date, hour, everything())
}
omProcessDaily(tmpOM)
## # A tibble: 373 × 5
## date time precipitation_sum rain_sum snowfall_sum
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2022-06-01 2022-06-01 3.8 3.8 0
## 2 2022-06-02 2022-06-02 0 0 0
## 3 2022-06-03 2022-06-03 0 0 0
## 4 2022-06-04 2022-06-04 1.3 1.3 0
## 5 2022-06-05 2022-06-05 0.3 0.3 0
## 6 2022-06-06 2022-06-06 12.5 12.5 0
## 7 2022-06-07 2022-06-07 2 2 0
## 8 2022-06-08 2022-06-08 16 16 0
## 9 2022-06-09 2022-06-09 0 0 0
## 10 2022-06-10 2022-06-10 0.6 0.6 0
## # … with 363 more rows
omProcessHourly(tmpOM)
## # A tibble: 8,952 × 10
## time date hour temperat…¹ relat…² dewpo…³ preci…⁴ rain
## <dttm> <date> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2022-06-01 00:00:00 2022-06-01 0 21 92 19.7 0.1 0.1
## 2 2022-06-01 01:00:00 2022-06-01 1 20.6 94 19.6 0.3 0.3
## 3 2022-06-01 02:00:00 2022-06-01 2 21.1 93 19.9 0 0
## 4 2022-06-01 03:00:00 2022-06-01 3 20.8 93 19.7 0 0
## 5 2022-06-01 04:00:00 2022-06-01 4 20.5 93 19.3 0 0
## 6 2022-06-01 05:00:00 2022-06-01 5 19.7 95 19 0.7 0.7
## 7 2022-06-01 06:00:00 2022-06-01 6 19.4 96 18.8 1.6 1.6
## 8 2022-06-01 07:00:00 2022-06-01 7 19.2 96 18.5 1 1
## 9 2022-06-01 08:00:00 2022-06-01 8 18.6 90 17 0.1 0.1
## 10 2022-06-01 09:00:00 2022-06-01 9 17.7 84 14.9 0 0
## # … with 8,942 more rows, 2 more variables: snowfall <dbl>, origTime <chr>, and
## # abbreviated variable names ¹temperature_2m, ²relativehumidity_2m,
## # ³dewpoint_2m, ⁴precipitation
Function readOpenMeteoJSON() is updated to automatically incorporate date conversions:
readOpenMeteoJSON <- function(js, mapDaily=tblMetricsDaily, mapHourly=tblMetricsHourly) {
# FUNCTION arguments:
# js: JSON list returned by download from Open-Meteo
# mapDaily: mapping file for daily metrics
# mapHourly: mapping file for hourly metrics
# Get the object and names
jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
nms <- jsObj %>% names()
cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
# Set default objects as NULL
tblDaily <- NULL
tblHourly <- NULL
tblUnitsDaily <- NULL
tblUnitsHourly <- NULL
# Get daily and hourly as tibble if relevant
if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble() %>% omProcessDaily()
if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble() %>% omProcessHourly()
# Helper function for unit conversions
helperMetricUnit <- function(x, mapper, desc=NULL) {
if(is.null(desc))
desc <- as.list(match.call())$x %>%
deparse() %>%
stringr::str_replace_all(pattern=".*\\$", replacement="")
x %>%
tibble::as_tibble() %>%
pivot_longer(cols=everything()) %>%
left_join(mapper, by=c("name"="metric")) %>%
mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>%
mutate(metricType=desc) %>%
select(metricType, everything())
}
# Get the unit descriptions
if("daily_units" %in% nms) tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, mapDaily)
if("hourly_units" %in% nms) tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, mapHourly)
if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly))
tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
else tblUnits <- NULL
# Put everything else together
tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
tibble::as_tibble()
# Return the list objects
list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
}
tmpOM2 <- readOpenMeteoJSON("notuse_testOM.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly, daily_units, daily
tmpOM2
## $tblDaily
## # A tibble: 373 × 5
## date time precipitation_sum rain_sum snowfall_sum
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2022-06-01 2022-06-01 3.8 3.8 0
## 2 2022-06-02 2022-06-02 0 0 0
## 3 2022-06-03 2022-06-03 0 0 0
## 4 2022-06-04 2022-06-04 1.3 1.3 0
## 5 2022-06-05 2022-06-05 0.3 0.3 0
## 6 2022-06-06 2022-06-06 12.5 12.5 0
## 7 2022-06-07 2022-06-07 2 2 0
## 8 2022-06-08 2022-06-08 16 16 0
## 9 2022-06-09 2022-06-09 0 0 0
## 10 2022-06-10 2022-06-10 0.6 0.6 0
## # … with 363 more rows
##
## $tblHourly
## # A tibble: 8,952 × 10
## time date hour temperat…¹ relat…² dewpo…³ preci…⁴ rain
## <dttm> <date> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2022-06-01 00:00:00 2022-06-01 0 21 92 19.7 0.1 0.1
## 2 2022-06-01 01:00:00 2022-06-01 1 20.6 94 19.6 0.3 0.3
## 3 2022-06-01 02:00:00 2022-06-01 2 21.1 93 19.9 0 0
## 4 2022-06-01 03:00:00 2022-06-01 3 20.8 93 19.7 0 0
## 5 2022-06-01 04:00:00 2022-06-01 4 20.5 93 19.3 0 0
## 6 2022-06-01 05:00:00 2022-06-01 5 19.7 95 19 0.7 0.7
## 7 2022-06-01 06:00:00 2022-06-01 6 19.4 96 18.8 1.6 1.6
## 8 2022-06-01 07:00:00 2022-06-01 7 19.2 96 18.5 1 1
## 9 2022-06-01 08:00:00 2022-06-01 8 18.6 90 17 0.1 0.1
## 10 2022-06-01 09:00:00 2022-06-01 9 17.7 84 14.9 0 0
## # … with 8,942 more rows, 2 more variables: snowfall <dbl>, origTime <chr>, and
## # abbreviated variable names ¹temperature_2m, ²relativehumidity_2m,
## # ³dewpoint_2m, ⁴precipitation
##
## $tblUnits
## # A tibble: 11 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above g…
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters above…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters a…
## 5 hourly_units precipitation mm Total precipitation (rain, showers,…
## 6 hourly_units rain mm Only liquid precipitation of the pr…
## 7 hourly_units snowfall cm Snowfall amount of the preceding ho…
## 8 daily_units time iso8601 <NA>
## 9 daily_units precipitation_sum mm Sum of daily precipitation (includi…
## 10 daily_units rain_sum mm Sum of daily rain
## 11 daily_units snowfall_sum cm Sum of daily snowfall
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
## <dbl> <dbl> <dbl> <int> <chr> <chr> <dbl>
## 1 41.8 -87.7 2.66 -18000 Americ… CDT 180
## # … with abbreviated variable names ¹utc_offset_seconds, ²timezone,
## # ³timezone_abbreviation, ⁴elevation
prettyOpenMeteoMeta(tmpOM2)
##
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 2.658963
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
identical(tmpOM$tblUnits, tmpOM2$tblUnits)
## [1] TRUE
identical(tmpOM$tblDescription, tmpOM2$tblDescription)
## [1] TRUE
identical(tmpOM$tblDaily %>% omProcessDaily(), tmpOM2$tblDaily)
## [1] TRUE
identical(tmpOM$tblHourly %>% omProcessHourly(), tmpOM2$tblHourly)
## [1] TRUE
The daily data is tested for file download, cached to avoid multiple hits to the server:
testURLDaily <- helperOpenMeteoURL(cityName="Chicago IL",
dailyIndices=1:nrow(tblMetricsDaily),
startDate="2010-01-01",
endDate="2023-06-15",
tz="America/Chicago"
)
##
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2010-01-01&end_date=2023-06-15&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM_daily.json")) {
fileDownload(fileName="notuse_testOM_daily.json", url=testURLDaily)
} else {
cat("\nFile notuse_testOM_daily.json already exists, skipping download\n")
}
## size isdir mode mtime
## notuse_testOM_daily.json 573218 FALSE 666 2023-06-23 07:53:04
## ctime atime exe
## notuse_testOM_daily.json 2023-06-23 07:52:59 2023-06-23 07:53:04 no
Data are read and stored as a list:
tmpOMDaily <- readOpenMeteoJSON("notuse_testOM_daily.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
tmpOMDaily
## $tblDaily
## # A tibble: 4,914 × 18
## date time weath…¹ tempe…² tempe…³ appar…⁴ appar…⁵ preci…⁶ rain_…⁷
## <date> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 3 -8.6 -13.4 -14.9 -19.6 0 0
## 2 2010-01-02 2010-01-02 2 -10.4 -15.1 -17.5 -21.7 0 0
## 3 2010-01-03 2010-01-03 3 -7.9 -13.8 -13.6 -20.1 0 0
## 4 2010-01-04 2010-01-04 3 -6.9 -12.3 -12.8 -18.9 0 0
## 5 2010-01-05 2010-01-05 3 -4.8 -9.8 -10.1 -15.7 0 0
## 6 2010-01-06 2010-01-06 71 -4.9 -9 -9.2 -14.4 0 0
## 7 2010-01-07 2010-01-07 73 -5.2 -8.5 -9.3 -13 7.5 0
## 8 2010-01-08 2010-01-08 73 -3 -9.4 -9.2 -15.3 2.3 0
## 9 2010-01-09 2010-01-09 3 -5.8 -12.3 -10.8 -18.2 0 0
## 10 2010-01-10 2010-01-10 3 -8.8 -19.4 -16.2 -25.6 0 0
## # … with 4,904 more rows, 9 more variables: snowfall_sum <dbl>,
## # precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>, and abbreviated variable names
## # ¹weathercode, ²temperature_2m_max, ³temperature_2m_min,
## # ⁴apparent_temperature_max, ⁵apparent_temperature_min, ⁶precipitation_sum, …
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
## <dbl> <dbl> <dbl> <int> <chr> <chr> <dbl>
## 1 41.8 -87.7 508. -18000 Americ… CDT 180
## # … with abbreviated variable names ¹utc_offset_seconds, ²timezone,
## # ³timezone_abbreviation, ⁴elevation
prettyOpenMeteoMeta(tmpOMDaily)
##
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 508.3281
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
# Exploration of precipitation hours by day
tmpOMDaily$tblDaily %>% count(precipitation_hours) %>% print(n=30)
## # A tibble: 25 × 2
## precipitation_hours n
## <dbl> <int>
## 1 0 2499
## 2 1 287
## 3 2 288
## 4 3 231
## 5 4 180
## 6 5 201
## 7 6 152
## 8 7 157
## 9 8 123
## 10 9 121
## 11 10 98
## 12 11 90
## 13 12 74
## 14 13 53
## 15 14 70
## 16 15 41
## 17 16 54
## 18 17 48
## 19 18 36
## 20 19 31
## 21 20 22
## 22 21 11
## 23 22 15
## 24 23 18
## 25 24 14
tmpOMDaily$tblDaily %>%
filter(lubridate::year(date)<=2022) %>%
ggplot(aes(x=precipitation_hours)) +
geom_density(aes(group=lubridate::year(date), color=as.factor(lubridate::year(date)))) +
scale_color_discrete("Year") +
labs(title="Hours of Precipitation per Day", x="Hours of Precipitation", y="Annual density")
tmpOMDaily$tblDaily %>%
filter(lubridate::year(date)<=2022) %>%
ggplot(aes(x=precipitation_hours)) +
geom_histogram(aes(fill=as.factor(lubridate::year(date))), bins=25) +
scale_fill_discrete("Year") +
facet_wrap(~lubridate::year(date)) +
labs(title="Hours of Precipitation per Day", x="Hours of Precipitation", y="# Days")
Precipitation by month is explored:
dfPrecip <- tmpOMDaily$tblDaily %>%
filter(lubridate::year(date)<=2022) %>%
select(date, precipitation_sum, rain_sum, snowfall_sum) %>%
mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb),
yyyymm=customYYYYMM(date)
) %>%
group_by(yyyymm, month) %>%
summarize(across(where(is.numeric), sum), n=n(), .groups="drop")
dfPrecip
## # A tibble: 156 × 6
## yyyymm month precipitation_sum rain_sum snowfall_sum n
## <chr> <fct> <dbl> <dbl> <dbl> <int>
## 1 2010-01 Jan 25.9 12.6 10.8 31
## 2 2010-02 Feb 36.1 0.1 28.6 28
## 3 2010-03 Mar 58 47.7 7.21 31
## 4 2010-04 Apr 100. 100. 0 30
## 5 2010-05 May 154. 154. 0 31
## 6 2010-06 Jun 226. 226. 0 30
## 7 2010-07 Jul 145. 145. 0 31
## 8 2010-08 Aug 66.4 66.4 0 31
## 9 2010-09 Sep 104. 104. 0 30
## 10 2010-10 Oct 60.7 60.7 0 31
## # … with 146 more rows
# Boxplot of precipitation by month
dfPrecip %>%
select(-n) %>%
pivot_longer(-c(yyyymm, month)) %>%
ggplot(aes(x=month, y=ifelse(name=="snowfall_sum", 10*value, value))) +
geom_boxplot(fill="lightblue") +
facet_wrap(~name, scales="free_y") +
labs(title="Precipitation by month (2010-2022)", y="Precipitation (mm)", x=NULL) +
theme(axis.text.x = element_text(angle = 90)) +
lims(y=c(0, NA))
# Mean precipitation by month
dfPrecip %>%
group_by(month) %>%
summarize(across(where(is.numeric), mean)) %>%
ggplot(aes(x=month)) +
geom_col(aes(y=precipitation_sum), fill="green") +
geom_col(aes(y=rain_sum), fill="lightblue") +
geom_text(aes(y=rain_sum/2, label=round(rain_sum))) +
geom_text(aes(y=rain_sum/2 + precipitation_sum/2,
label=ifelse(precipitation_sum>rain_sum+3, round(precipitation_sum-rain_sum), "")
)
) +
geom_text(aes(y=precipitation_sum+5, label=round(precipitation_sum))) +
labs(x=NULL,
y="Precipitation (mm)",
title="Mean precipitation by month (2010-2022)",
subtitle="Light blue is mm falling as rain, green is liquid equivalent of other"
)
Average temperatures by month are also explored:
dfTemp <- tmpOMDaily$tblDaily %>%
filter(lubridate::year(date)<=2022) %>%
select(date,
temperature_2m_max,
temperature_2m_min,
apparent_temperature_max,
apparent_temperature_min
) %>%
mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb),
yyyymm=customYYYYMM(date)
) %>%
group_by(yyyymm, month) %>%
summarize(across(where(is.numeric), mean), n=n(), .groups="drop")
dfTemp
## # A tibble: 156 × 7
## yyyymm month temperature_2m_max temperature_2m_min apparent_…¹ appar…² n
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <int>
## 1 2010-01 Jan -2.46 -8.18 -7.64 -13.7 31
## 2 2010-02 Feb -0.414 -7.31 -4.85 -12.4 28
## 3 2010-03 Mar 7.67 -0.452 4.25 -5.13 31
## 4 2010-04 Apr 16.9 7.40 14.6 3.56 30
## 5 2010-05 May 20.5 13.1 20.5 11.3 31
## 6 2010-06 Jun 26.0 19.1 28.0 19.5 30
## 7 2010-07 Jul 29.1 21.7 32.0 23.5 31
## 8 2010-08 Aug 28.6 21.5 31.2 23.0 31
## 9 2010-09 Sep 22.7 16.0 22.1 14.8 30
## 10 2010-10 Oct 18.1 10.3 15.4 7.14 31
## # … with 146 more rows, and abbreviated variable names
## # ¹apparent_temperature_max, ²apparent_temperature_min
# Boxplot of precipitation by month
dfTemp %>%
select(-n) %>%
pivot_longer(-c(yyyymm, month)) %>%
ggplot(aes(x=month, y=value)) +
geom_boxplot(fill="lightblue") +
facet_wrap(~name) +
labs(title="Average temperature by month (2010-2022)", y="Average temperature (C)", x=NULL) +
theme(axis.text.x = element_text(angle = 90))
# Mean temperatures by month
dfTemp %>%
select(-n) %>%
group_by(month) %>%
summarize(across(where(is.numeric), mean)) %>%
pivot_longer(cols=-c(month)) %>%
mutate(measType=stringr::str_replace(name, ".*_", ""),
meas=ifelse(str_detect(name, "apparent"), "apparent", "actual")
) %>%
select(-name) %>%
pivot_wider(id_cols=c(month, meas), names_from="measType", values_from="value") %>%
ggplot(aes(x=month)) +
geom_tile(aes(y=(max+min)/2, height=max-min), width=0.5, fill="lightblue") +
geom_text(aes(y=max+1, label=round(max, 1)), size=2.5) +
geom_text(aes(y=min-1, label=round(min, 1)), size=2.5) +
labs(x=NULL,
y="Temperature (C)",
title="Mean high and low temperature by month (2010-2022)",
subtitle="Actual temperature and apparent temperature"
) +
facet_wrap(~meas)
Sunrise and sunset times are explored:
dfSun <- tmpOMDaily$tblDaily %>%
filter(lubridate::year(date)<=2022) %>%
select(date, sunrise, sunset) %>%
mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb),
yyyymm=customYYYYMM(date),
across(c(sunrise, sunset), lubridate::ymd_hm),
sr=hms::as_hms(sunrise),
ss=hms::as_hms(sunset),
doy=lubridate::yday(date),
year=lubridate::year(date)
)
dfSun
## # A tibble: 4,748 × 9
## date sunrise sunset month yyyymm sr ss
## <date> <dttm> <dttm> <fct> <chr> <time> <tim>
## 1 2010-01-01 2010-01-01 08:16:00 2010-01-01 17:32:00 Jan 2010-01 08:16 17:32
## 2 2010-01-02 2010-01-02 08:16:00 2010-01-02 17:33:00 Jan 2010-01 08:16 17:33
## 3 2010-01-03 2010-01-03 08:16:00 2010-01-03 17:34:00 Jan 2010-01 08:16 17:34
## 4 2010-01-04 2010-01-04 08:16:00 2010-01-04 17:34:00 Jan 2010-01 08:16 17:34
## 5 2010-01-05 2010-01-05 08:16:00 2010-01-05 17:35:00 Jan 2010-01 08:16 17:35
## 6 2010-01-06 2010-01-06 08:16:00 2010-01-06 17:36:00 Jan 2010-01 08:16 17:36
## 7 2010-01-07 2010-01-07 08:16:00 2010-01-07 17:37:00 Jan 2010-01 08:16 17:37
## 8 2010-01-08 2010-01-08 08:16:00 2010-01-08 17:38:00 Jan 2010-01 08:16 17:38
## 9 2010-01-09 2010-01-09 08:16:00 2010-01-09 17:39:00 Jan 2010-01 08:16 17:39
## 10 2010-01-10 2010-01-10 08:15:00 2010-01-10 17:40:00 Jan 2010-01 08:15 17:40
## # … with 4,738 more rows, and 2 more variables: doy <dbl>, year <dbl>
# Plot of sunrise and sunset by day of year
dfSun %>%
select(date, month, year, doy, sr, ss) %>%
ggplot(aes(x=doy, group=factor(year), color=factor(year))) +
geom_line(aes(y=sr)) +
geom_line(aes(y=ss)) +
geom_line(aes(y=(ss+sr)/2)) +
labs(x="Day of year", y="Time (always on DST)", title="Sunrise, sunset, and solar noon by day of year") +
scale_color_discrete("Year")
# Plot of minutes gained from earliest/latest
dfSun %>%
select(date, month, year, doy, sr, ss) %>%
group_by(year) %>%
mutate(dsr=max(sr)-sr, dss=ss-min(ss)) %>%
ungroup() %>%
rename(sunrise_change=dsr, sunset_change=dss) %>%
pivot_longer(cols=c(sunrise_change, sunset_change)) %>%
ggplot(aes(x=doy)) +
geom_point(aes(y=as.numeric(value)/60, color=name), size=0.5) +
labs(x="Day of year", y="Minutes", title="Delta from latest sunrise / earliest sunset") +
scale_color_discrete("Metric")
Wind data is explored:
dfWind <- tmpOMDaily$tblDaily %>%
select(date,
dir=winddirection_10m_dominant,
spd=windspeed_10m_max,
gst=windgusts_10m_max
) %>%
mutate(month=lubridate::month(date),
year=lubridate::year(date),
dir10=round(dir/10)*10,
spd5=round(spd/5)*5,
gst5=round(gst/5)*5
)
dfWind
## # A tibble: 4,914 × 9
## date dir spd gst month year dir10 spd5 gst5
## <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 291 23.4 38.2 1 2010 290 25 40
## 2 2010-01-02 309 24.3 40.3 1 2010 310 25 40
## 3 2010-01-03 313 21.6 35.6 1 2010 310 20 35
## 4 2010-01-04 304 21 35.3 1 2010 300 20 35
## 5 2010-01-05 298 19.8 33.1 1 2010 300 20 35
## 6 2010-01-06 280 16.4 27 1 2010 280 15 25
## 7 2010-01-07 240 16.3 29.5 1 2010 240 15 30
## 8 2010-01-08 334 27.6 45 1 2010 330 30 45
## 9 2010-01-09 305 17.2 28.1 1 2010 300 15 30
## 10 2010-01-10 226 27.9 46.4 1 2010 230 30 45
## # … with 4,904 more rows
# Plot of wind direction and speed
dfWind %>%
ggplot(aes(x=dir, y=spd)) +
geom_point(alpha=0.2, size=0.5) +
coord_polar() +
facet_wrap(~factor(month.abb[month], levels=month.abb), nrow=2) +
geom_vline(xintercept=c(0, 90, 180, 270), lty=2, color="red") +
labs(title="Maximum wind speed and predominant direction (measured daily)",
y="Maximum Wind speed (km/h)",
x="Predominant Wind direction"
) +
scale_x_continuous(breaks=c(0, 90, 180, 270))
dfWind %>%
filter(lubridate::year(date)<=2022) %>%
count(month, dir10, spd5) %>%
ggplot(aes(x=dir10, y=spd5)) +
geom_point(aes(size=n), alpha=0.2) +
coord_polar() +
facet_wrap(~factor(month.abb[month], levels=month.abb)) +
geom_vline(xintercept=c(0, 90, 180, 270), lty=2, color="red") +
labs(title="Maximum wind speed and predominant direction (measured daily)",
subtitle="Wind speed rounded to nearest 5 km/h, wind direction rounded to nearest 10 degrees",
y="Maximum Wind speed (km/h)",
x="Predominant Wind direction"
) +
scale_x_continuous(breaks=c(0, 90, 180, 270))
# Plot of predominant wind direction
dfWind %>%
ggplot(aes(x=dir10)) +
geom_histogram(binwidth=10) +
facet_wrap(~factor(month.abb[month], levels=month.abb)) +
geom_vline(xintercept=c(0, 90, 180, 270, 360), lty=2, color="red") +
labs(title="Predominant wind direction (measured daily)",
y="# Days",
x="Predominant wind direction (rounded to nearest 10 degrees)"
) +
scale_x_continuous(breaks=c(0, 90, 180, 270, 360))
# Plot of maximum wind speed
dfWind %>%
ggplot(aes(x=spd5)) +
geom_histogram(binwidth=5) +
facet_wrap(~factor(month.abb[month], levels=month.abb)) +
labs(title="Maximum wind speed (measured daily)",
y="# Days",
x="Maximum wind speed (km/h, rounded to nearest 5 km/h)"
)
# Mean maximum wind speed by month
dfWind %>%
filter(year<=2022) %>%
select(date, month, year, spd, gst) %>%
pivot_longer(cols=-c(date, month, year)) %>%
ggplot(aes(x=factor(month.abb[month], levels=month.abb), y=value)) +
geom_boxplot(fill="lightblue") +
facet_wrap(~c("gst"="2. Maximum wind gust", "spd"="1. Maximum wind speed")[name]) +
labs(title="Wind speed measured daily (2010-2022)", y="Wind speed (km/h)", x=NULL) +
theme(axis.text.x = element_text(angle = 90)) +
lims(y=c(0, NA))
Weather codes, radiation, and evapotranspiration are explored:
dfOther <- tmpOMDaily$tblDaily %>%
select(date, wc=weathercode, sw=shortwave_radiation_sum, et=et0_fao_evapotranspiration) %>%
mutate(wc=factor(wc, levels=sort(unique(wc))),
year=lubridate::year(date),
month=factor(month.abb[lubridate::month(date)], levels=month.abb),
yyyymm=customYYYYMM(date)
)
dfOther
## # A tibble: 4,914 × 7
## date wc sw et year month yyyymm
## <date> <fct> <dbl> <dbl> <dbl> <fct> <chr>
## 1 2010-01-01 3 6.94 0.53 2010 Jan 2010-01
## 2 2010-01-02 2 7.91 0.49 2010 Jan 2010-01
## 3 2010-01-03 3 5.62 0.46 2010 Jan 2010-01
## 4 2010-01-04 3 5.09 0.48 2010 Jan 2010-01
## 5 2010-01-05 3 6.61 0.52 2010 Jan 2010-01
## 6 2010-01-06 71 7.47 0.48 2010 Jan 2010-01
## 7 2010-01-07 73 3.82 0.29 2010 Jan 2010-01
## 8 2010-01-08 73 6.47 0.53 2010 Jan 2010-01
## 9 2010-01-09 3 6.22 0.38 2010 Jan 2010-01
## 10 2010-01-10 3 8.99 0.35 2010 Jan 2010-01
## # … with 4,904 more rows
# Histogram of weather code
dfOther %>%
filter(year<=2022) %>%
ggplot(aes(x=wc)) +
geom_bar() +
facet_wrap(~month) +
labs(title="Weather codes by month (2010-2022)", y="Count", x="Weather code") +
theme(axis.text.x = element_text(angle = 90))
# Mean radiation and evapotranspiration by month
dfOther %>%
select(-year) %>%
group_by(month) %>%
summarize(across(where(is.numeric), mean)) %>%
pivot_longer(cols=-c(month)) %>%
ggplot(aes(x=month)) +
geom_point(aes(y=value)) +
geom_line(aes(y=value, group=1)) +
labs(x=NULL,
y=NULL,
title="Mean radiation and evapotranspiration by month (2010-2022)",
subtitle="Evapotranspiration (mm) and Radiation (MegaJoules)"
) +
facet_wrap(~c("et"="Evapotranspiration (mm)", "sw"="Radiation (MJ)")[name], scales="free_y") +
lims(y=c(0, NA))
# Boxplot for radiation and evapotranspiration by month
dfOther %>%
select(date, month, sw, et) %>%
pivot_longer(-c(date, month)) %>%
ggplot(aes(x=month)) +
geom_boxplot(aes(y=value), fill="lightblue") +
labs(x=NULL,
y=NULL,
title="Daily radiation and evapotranspiration (2010-2022)",
subtitle="Evapotranspiration (mm) and Radiation (MegaJoules)"
) +
facet_wrap(~c("et"="Evapotranspiration (mm)", "sw"="Radiation (MJ)")[name], scales="free_y") +
lims(y=c(0, NA))
The hourly data is tested for file download, cached to avoid multiple hits to the server:
testURLHourly <- helperOpenMeteoURL(cityName="Chicago IL",
hourlyIndices=1:nrow(tblMetricsHourly),
startDate="2010-01-01",
endDate="2023-06-15",
tz="America/Chicago"
)
##
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2010-01-01&end_date=2023-06-15&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM_hourly.json")) {
fileDownload(fileName="notuse_testOM_hourly.json", url=testURLHourly)
} else {
cat("\nFile notuse_testOM_hourly.json already exists, skipping download\n")
}
## size isdir mode mtime
## notuse_testOM_hourly.json 20178300 FALSE 666 2023-06-30 08:03:13
## ctime atime exe
## notuse_testOM_hourly.json 2023-06-30 08:02:50 2023-06-30 08:03:13 no
Data are read and stored as a list:
tmpOMHourly <- readOpenMeteoJSON("notuse_testOM_hourly.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
tmpOMHourly
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 117,936 × 37
## time date hour temper…¹ relat…² dewpo…³ appar…⁴ press…⁵
## <dttm> <date> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2010-01-01 00:00:00 2010-01-01 0 -9.5 67 -14.4 -15.8 1024.
## 2 2010-01-01 01:00:00 2010-01-01 1 -9.8 69 -14.4 -16.3 1025.
## 3 2010-01-01 02:00:00 2010-01-01 2 -10.3 73 -14.2 -16.8 1025.
## 4 2010-01-01 03:00:00 2010-01-01 3 -10.8 74 -14.5 -17.2 1026.
## 5 2010-01-01 04:00:00 2010-01-01 4 -11.3 75 -14.8 -17.7 1026.
## 6 2010-01-01 05:00:00 2010-01-01 5 -11.8 76 -15.1 -18.2 1026.
## 7 2010-01-01 06:00:00 2010-01-01 6 -12.3 77 -15.5 -18.6 1027.
## 8 2010-01-01 07:00:00 2010-01-01 7 -12.8 78 -15.8 -19 1028.
## 9 2010-01-01 08:00:00 2010-01-01 8 -13.2 79 -16.1 -19.4 1028.
## 10 2010-01-01 09:00:00 2010-01-01 9 -13.4 78 -16.3 -19.6 1028.
## # … with 117,926 more rows, 29 more variables: surface_pressure <dbl>,
## # precipitation <dbl>, rain <dbl>, snowfall <dbl>, cloudcover <int>,
## # cloudcover_low <int>, cloudcover_mid <int>, cloudcover_high <int>,
## # shortwave_radiation <dbl>, direct_radiation <dbl>,
## # direct_normal_irradiance <dbl>, diffuse_radiation <dbl>,
## # windspeed_10m <dbl>, windspeed_100m <dbl>, winddirection_10m <int>,
## # winddirection_100m <int>, windgusts_10m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # … with 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
## <dbl> <dbl> <dbl> <int> <chr> <chr> <dbl>
## 1 41.8 -87.7 6370. -18000 Americ… CDT 180
## # … with abbreviated variable names ¹utc_offset_seconds, ²timezone,
## # ³timezone_abbreviation, ⁴elevation
prettyOpenMeteoMeta(tmpOMHourly)
##
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 6369.988
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
Consistency of data between daily and hourly is explored:
# Variables where maximum of hourly should be created
vrblMax <- c("weathercode", "temperature_2m", "apparent_temperature", "windspeed_10m", "windgusts_10m")
# Variables where minimum of hourly should be created
vrblMin <- c("temperature_2m", "apparent_temperature")
# Variables where sum of hourly should be created
vrblSum <- c("precipitation", "rain", "snowfall", "shortwave_radiation", "et0_fao_evapotranspiration")
# Variables in daily not to explore
# date, time, sunrise, sunset
# Variables that require a different approach
# winddirection_10m_dominant, precipitation_hours
# Check that all variables are included in hourly data
c(vrblMax, vrblMin, vrblSum) %in% (tmpOMHourly$tblHourly %>% names)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Create daily data from hourly
dfDailyFromHourly <- tmpOMHourly$tblHourly %>%
group_by(date) %>%
summarize(across(.cols=all_of(vrblMax), .fns=max, .names="{.col}_max"),
across(.cols=all_of(vrblMin), .fns=min, .names="{.col}_min"),
across(.cols=all_of(vrblSum), .fns=sum, .names="{.col}_sum"),
precipitation_hours=sum(precipitation>0)
) %>%
rename(weathercode=weathercode_max, et0_fao_evapotranspiration=et0_fao_evapotranspiration_sum)
dfDailyFromHourly
## # A tibble: 4,914 × 14
## date weatherc…¹ tempe…² appar…³ winds…⁴ windg…⁵ tempe…⁶ appar…⁷ preci…⁸
## <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 3 -8.6 -14.9 23.4 38.2 -13.4 -19.6 0
## 2 2010-01-02 2 -10.4 -17.5 24.3 40.3 -15.1 -21.7 0
## 3 2010-01-03 3 -7.9 -13.6 21.6 35.6 -13.8 -20.1 0
## 4 2010-01-04 3 -6.9 -12.8 21 35.3 -12.3 -18.9 0
## 5 2010-01-05 3 -4.8 -10.1 19.8 33.1 -9.8 -15.7 0
## 6 2010-01-06 71 -4.9 -9.2 16.4 27 -9 -14.4 0
## 7 2010-01-07 73 -5.2 -9.3 16.3 29.5 -8.5 -13 7.5
## 8 2010-01-08 73 -3 -9.2 27.6 45 -9.4 -15.3 2.3
## 9 2010-01-09 3 -5.8 -10.8 17.2 28.1 -12.3 -18.2 0
## 10 2010-01-10 3 -8.8 -16.2 27.9 46.4 -19.4 -25.6 0
## # … with 4,904 more rows, 5 more variables: rain_sum <dbl>, snowfall_sum <dbl>,
## # shortwave_radiation_sum <dbl>, et0_fao_evapotranspiration <dbl>,
## # precipitation_hours <int>, and abbreviated variable names ¹weathercode,
## # ²temperature_2m_max, ³apparent_temperature_max, ⁴windspeed_10m_max,
## # ⁵windgusts_10m_max, ⁶temperature_2m_min, ⁷apparent_temperature_min,
## # ⁸precipitation_sum
names(dfDailyFromHourly)
## [1] "date" "weathercode"
## [3] "temperature_2m_max" "apparent_temperature_max"
## [5] "windspeed_10m_max" "windgusts_10m_max"
## [7] "temperature_2m_min" "apparent_temperature_min"
## [9] "precipitation_sum" "rain_sum"
## [11] "snowfall_sum" "shortwave_radiation_sum"
## [13] "et0_fao_evapotranspiration" "precipitation_hours"
names(dfDailyFromHourly) %in% names(tmpOMDaily$tblDaily)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Check data consistency
for (colName in names(dfDailyFromHourly)) {
cat("\n",
colName,
":",
all.equal(dfDailyFromHourly %>% pull(colName), tmpOMDaily$tblDaily %>% pull(colName))
)
}
##
## date : TRUE
## weathercode : TRUE
## temperature_2m_max : TRUE
## apparent_temperature_max : TRUE
## windspeed_10m_max : TRUE
## windgusts_10m_max : TRUE
## temperature_2m_min : TRUE
## apparent_temperature_min : TRUE
## precipitation_sum : TRUE
## rain_sum : TRUE
## snowfall_sum : TRUE
## shortwave_radiation_sum : Mean relative difference: 0.9964
## et0_fao_evapotranspiration : TRUE
## precipitation_hours : TRUE
# Plot for differences in radiation
dfRadiation <- dfDailyFromHourly %>%
select(date, shortwave_radiation_sum) %>%
bind_rows(select(tmpOMDaily$tblDaily, date, shortwave_radiation_sum), .id="src") %>%
mutate(src=c("1"="Daily from Hourly", "2"="Daily as Reported")[src])
dfRadiation %>%
ggplot(aes(x=date, y=shortwave_radiation_sum)) +
geom_line(aes(group=src, color=src)) +
labs(x=NULL, y="Sum of radiation", title="Comparison of shortwave radiation by day by source")
# Exploration of units
tmpOMDaily$tblUnits %>% filter(name=="shortwave_radiation_sum")
## # A tibble: 1 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units shortwave_radiation_sum MJ/m² The sum of solar radiaion on a give…
tmpOMHourly$tblUnits %>% filter(name=="shortwave_radiation")
## # A tibble: 1 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units shortwave_radiation W/m² Shortwave solar radiation as average o…
# Conversion of Watts per hour to MegaJoules
# 0.0036 megajoules/watt-hour
dfRadiation %>%
ggplot(aes(x=date, y=ifelse(src=="Daily from Hourly", 0.0036, 1)*shortwave_radiation_sum)) +
geom_line(aes(group=src, color=src)) +
labs(x=NULL,
y="Sum of radiation",
title="Comparison of shortwave radiation by day by source",
subtitle="Summed from hourly multiplied by 0.0036 to convert Watt-hours to MegaJoules"
)
dfRadiation %>%
pivot_wider(id_cols="date", names_from="src", values_from="shortwave_radiation_sum") %>%
mutate(rat=`Daily as Reported`/`Daily from Hourly`) %>%
summary()
## date Daily from Hourly Daily as Reported rat
## Min. :2010-01-01 Min. : 135 Min. : 0.49 Min. :0.003585
## 1st Qu.:2013-05-13 1st Qu.:2304 1st Qu.: 8.29 1st Qu.:0.003599
## Median :2016-09-22 Median :4062 Median :14.62 Median :0.003600
## Mean :2016-09-22 Mean :4190 Mean :15.08 Mean :0.003600
## 3rd Qu.:2020-02-02 3rd Qu.:6056 3rd Qu.:21.80 3rd Qu.:0.003601
## Max. :2023-06-15 Max. :8788 Max. :31.64 Max. :0.003630
With the exception of radiation (reported in different units causing slight rounding differences), the reported daily data matches the expected aggregate of the reported hourly data
Precipitation by hour is explored:
# Hourly precipitation data
dfHourlyPrecip <- tmpOMHourly$tblHourly %>%
select(time, hour, precipitation, snowfall, rain) %>%
mutate(year=lubridate::year(time),
month=factor(month.abb[lubridate::month(time)], levels=month.abb)
) %>%
pivot_longer(cols=-c(time, year, month, hour))
dfHourlyPrecip
## # A tibble: 353,808 × 6
## time hour year month name value
## <dttm> <int> <dbl> <fct> <chr> <dbl>
## 1 2010-01-01 00:00:00 0 2010 Jan precipitation 0
## 2 2010-01-01 00:00:00 0 2010 Jan snowfall 0
## 3 2010-01-01 00:00:00 0 2010 Jan rain 0
## 4 2010-01-01 01:00:00 1 2010 Jan precipitation 0
## 5 2010-01-01 01:00:00 1 2010 Jan snowfall 0
## 6 2010-01-01 01:00:00 1 2010 Jan rain 0
## 7 2010-01-01 02:00:00 2 2010 Jan precipitation 0
## 8 2010-01-01 02:00:00 2 2010 Jan snowfall 0
## 9 2010-01-01 02:00:00 2 2010 Jan rain 0
## 10 2010-01-01 03:00:00 3 2010 Jan precipitation 0
## # … with 353,798 more rows
# Nil precipitation percent
dfNilPrecip <- dfHourlyPrecip %>%
group_by(month, name) %>%
summarize(pctNil=mean(value==0), .groups="drop")
dfNilPrecip
## # A tibble: 36 × 3
## month name pctNil
## <fct> <chr> <dbl>
## 1 Jan precipitation 0.845
## 2 Jan rain 0.925
## 3 Jan snowfall 0.877
## 4 Feb precipitation 0.845
## 5 Feb rain 0.938
## 6 Feb snowfall 0.865
## 7 Mar precipitation 0.851
## 8 Mar rain 0.884
## 9 Mar snowfall 0.946
## 10 Apr precipitation 0.807
## # … with 26 more rows
# Graphs of precipitation amount by month
for(metric in unique(dfHourlyPrecip$name)) {
p1 <- dfHourlyPrecip %>%
filter(name==metric, value>0, year<=2022) %>%
ggplot() +
geom_histogram(aes(x=value), bins = 50) +
facet_wrap(~month) +
labs(x=NULL,
y=NULL,
title=paste0(metric,
": hourly total (",
tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value),
") from 2010-2022"
)
) +
geom_text(data=dfNilPrecip %>% filter(name==metric),
aes(x=Inf,
y=Inf,
label=paste0("Excludes ", round(100*pctNil, 1), "%\nof observations at 0")
),
size=2.5,
hjust=1,
vjust=1
)
print(p1)
}
Temperature by hour is explored:
# Hourly temperature data
dfHourlyTemp <- tmpOMHourly$tblHourly %>%
select(time, hour, temperature_2m, apparent_temperature, dewpoint_2m) %>%
mutate(year=lubridate::year(time),
month=factor(month.abb[lubridate::month(time)], levels=month.abb)
) %>%
pivot_longer(cols=-c(time, year, month, hour))
dfHourlyTemp
## # A tibble: 353,808 × 6
## time hour year month name value
## <dttm> <int> <dbl> <fct> <chr> <dbl>
## 1 2010-01-01 00:00:00 0 2010 Jan temperature_2m -9.5
## 2 2010-01-01 00:00:00 0 2010 Jan apparent_temperature -15.8
## 3 2010-01-01 00:00:00 0 2010 Jan dewpoint_2m -14.4
## 4 2010-01-01 01:00:00 1 2010 Jan temperature_2m -9.8
## 5 2010-01-01 01:00:00 1 2010 Jan apparent_temperature -16.3
## 6 2010-01-01 01:00:00 1 2010 Jan dewpoint_2m -14.4
## 7 2010-01-01 02:00:00 2 2010 Jan temperature_2m -10.3
## 8 2010-01-01 02:00:00 2 2010 Jan apparent_temperature -16.8
## 9 2010-01-01 02:00:00 2 2010 Jan dewpoint_2m -14.2
## 10 2010-01-01 03:00:00 3 2010 Jan temperature_2m -10.8
## # … with 353,798 more rows
# Graphs of precipitation amount by month
for(metric in unique(dfHourlyTemp$name)) {
p1 <- dfHourlyTemp %>%
filter(name==metric, year<=2022) %>%
ggplot() +
geom_boxplot(aes(x=factor(hour), y=value), fill = "lightblue") +
facet_wrap(~month) +
labs(x=NULL,
y=NULL,
title=paste0(metric,
": hourly boxplot (",
tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value),
") from 2010-2022"
)
)
print(p1)
}
# Spread of temperature by day
dfHourlyTemp %>%
mutate(date=lubridate::date(time)) %>%
group_by(year, month, date, name) %>%
summarize(maxValue=max(value), minValue=min(value), mdnValue=median(value), .groups="drop") %>%
mutate(spd=maxValue-minValue) %>%
group_by(month, name) %>%
summarize(across(where(is.numeric), mean), .groups="drop") %>%
ggplot(aes(x=fct_rev(month), y=spd)) +
geom_point() +
coord_flip() +
facet_wrap(~name) +
labs(title="Average high/low spread of key metrics by month (deg C)", x=NULL, y="deg C") +
lims(y=c(0, NA))
Hours with maximum/minimum temperature and precipitation are explored:
# Create temperature and precipitation data
dfHourlyTempPrecip <- dfHourlyTemp %>%
bind_rows(dfHourlyPrecip) %>%
mutate(date=lubridate::date(time)) %>%
arrange(time, name)
dfHourlyTempPrecip
## # A tibble: 707,616 × 7
## time hour year month name value date
## <dttm> <int> <dbl> <fct> <chr> <dbl> <date>
## 1 2010-01-01 00:00:00 0 2010 Jan apparent_temperature -15.8 2010-01-01
## 2 2010-01-01 00:00:00 0 2010 Jan dewpoint_2m -14.4 2010-01-01
## 3 2010-01-01 00:00:00 0 2010 Jan precipitation 0 2010-01-01
## 4 2010-01-01 00:00:00 0 2010 Jan rain 0 2010-01-01
## 5 2010-01-01 00:00:00 0 2010 Jan snowfall 0 2010-01-01
## 6 2010-01-01 00:00:00 0 2010 Jan temperature_2m -9.5 2010-01-01
## 7 2010-01-01 01:00:00 1 2010 Jan apparent_temperature -16.3 2010-01-01
## 8 2010-01-01 01:00:00 1 2010 Jan dewpoint_2m -14.4 2010-01-01
## 9 2010-01-01 01:00:00 1 2010 Jan precipitation 0 2010-01-01
## 10 2010-01-01 01:00:00 1 2010 Jan rain 0 2010-01-01
## # … with 707,606 more rows
# Limit to temperature, dewpoint, and precipitation
# Limit precipitation to only days with precipitation > 0
tmpDF <- dfHourlyTempPrecip %>%
filter(name %in% c("dewpoint_2m", "precipitation", "temperature_2m")) %>%
group_by(date, name) %>%
filter(name!="precipitation" | sum(value)>0) %>%
mutate(isMax=ifelse(value==max(value), 1, 0), isMin=ifelse(value==min(value), 1, 0)) %>%
group_by(name, month, hour) %>%
summarize(across(c(isMax, isMin), mean), .groups="drop") %>%
pivot_longer(-c(name, month, hour), names_to="metric")
tmpDF
## # A tibble: 1,728 × 5
## name month hour metric value
## <chr> <fct> <int> <chr> <dbl>
## 1 dewpoint_2m Jan 0 isMax 0.212
## 2 dewpoint_2m Jan 0 isMin 0.196
## 3 dewpoint_2m Jan 1 isMax 0.0507
## 4 dewpoint_2m Jan 1 isMin 0.0968
## 5 dewpoint_2m Jan 2 isMax 0.0599
## 6 dewpoint_2m Jan 2 isMin 0.0484
## 7 dewpoint_2m Jan 3 isMax 0.0346
## 8 dewpoint_2m Jan 3 isMin 0.0253
## 9 dewpoint_2m Jan 4 isMax 0.0184
## 10 dewpoint_2m Jan 4 isMin 0.0507
## # … with 1,718 more rows
# Plot max/min for metric
for(keyMetric in unique(tmpDF$name)) {
p1 <- tmpDF %>%
filter(name==keyMetric) %>%
ggplot(aes(x=hour, y=value)) +
geom_line(aes(color=metric, group=metric)) +
facet_wrap(~month) +
labs(x="Hour of day",
y="% of time as max/min",
title=paste0(keyMetric, ": maximum and minimum by hour"),
subtitle=paste0("Ties included as full value",
ifelse(keyMetric=="precipitation", " (days with no precipitation excluded)", "")
)
) +
scale_color_discrete("Metric:")
print(p1)
}
# Plot percent of hours with precipitation
dfHourlyTempPrecip %>%
filter(name=="precipitation") %>%
group_by(month, hour) %>%
summarize(pct0=mean(value>0), pct05=mean(value>=0.5), .groups="drop") %>%
pivot_longer(-c(month, hour)) %>%
mutate(name=ifelse(name=="pct0", ">=0.1 mm", ">=0.5 mm")) %>%
ggplot(aes(x=hour, y=value)) +
geom_line(aes(group=name, color=name)) +
facet_wrap(~month) +
labs(x="Hour of day",
y="% at/above precipitation hurdle",
title=paste0("% of observations with precipitation in past hour")
) +
lims(y=c(0, NA))
Wind by hour is explored:
# Hourly wind data
dfHourlyWind <- tmpOMHourly$tblHourly %>%
select(time, hour, windspeed_10m, windgusts_10m, winddirection_10m) %>%
mutate(year=lubridate::year(time),
month=factor(month.abb[lubridate::month(time)], levels=month.abb)
) %>%
pivot_longer(cols=-c(time, year, month, hour))
dfHourlyWind
## # A tibble: 353,808 × 6
## time hour year month name value
## <dttm> <int> <dbl> <fct> <chr> <dbl>
## 1 2010-01-01 00:00:00 0 2010 Jan windspeed_10m 18.7
## 2 2010-01-01 00:00:00 0 2010 Jan windgusts_10m 33.8
## 3 2010-01-01 00:00:00 0 2010 Jan winddirection_10m 298
## 4 2010-01-01 01:00:00 1 2010 Jan windspeed_10m 20.1
## 5 2010-01-01 01:00:00 1 2010 Jan windgusts_10m 32.4
## 6 2010-01-01 01:00:00 1 2010 Jan winddirection_10m 291
## 7 2010-01-01 02:00:00 2 2010 Jan windspeed_10m 19.9
## 8 2010-01-01 02:00:00 2 2010 Jan windgusts_10m 34.2
## 9 2010-01-01 02:00:00 2 2010 Jan winddirection_10m 290
## 10 2010-01-01 03:00:00 3 2010 Jan windspeed_10m 19.5
## # … with 353,798 more rows
# Graphs of wind speed/gust by month
for(metric in setdiff(unique(dfHourlyWind$name), "winddirection_10m")) {
p1 <- dfHourlyWind %>%
filter(name==metric, year<=2022) %>%
ggplot() +
geom_boxplot(aes(x=factor(hour), y=value), fill = "lightblue") +
facet_wrap(~month) +
labs(x=NULL,
y=NULL,
title=paste0(metric,
": hourly boxplot (",
tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value),
") from 2010-2022"
)
)
print(p1)
}
# Average wind speed and gust by hour
dfHourlyWind %>%
filter(!(name %in% c("winddirection_10m")), year<=2022) %>%
group_by(month, hour, name) %>%
summarize(across("value", mean), .groups="drop") %>%
ggplot(aes(x=factor(hour), y=value)) +
geom_point(aes(group=name, color=name)) +
facet_wrap(~month) +
labs(title="Average wind speed and gust (km/h)", x=NULL, y="km/h") +
lims(y=c(0, NA))
# Average wind direction by hour
dfHourlyWind %>%
filter((name %in% c("winddirection_10m")), year<=2022) %>%
mutate(preDom=case_when(value<45|value>=315~"N",
value<135~"E",
value<225~"S",
value<315~"W",
TRUE~"error"
)
) %>%
count(month, hour, preDom) %>%
ggplot(aes(x=factor(hour), y=n)) +
geom_col(aes(fill=factor(preDom, levels=c("N", "W", "S", "E"))), position="fill") +
facet_wrap(~month) +
labs(title="Distribution of wind direction", x=NULL, y="Wind direction (%)") +
scale_fill_discrete("")
Wind by N/S and E/W is also explored:
# Average wind direction by hour
tmpWindDir <- dfHourlyWind %>%
filter((name %in% c("winddirection_10m")), year<=2022) %>%
mutate(ew=case_when(value>30&value<150~"E",
value>210&value<=330~"W",
TRUE~"none"
),
ns=case_when(value>300|value<=60~"N",
value>120&value<=240~"S",
TRUE~"none"
)
)
tmpWindDir
## # A tibble: 113,952 × 8
## time hour year month name value ew ns
## <dttm> <int> <dbl> <fct> <chr> <dbl> <chr> <chr>
## 1 2010-01-01 00:00:00 0 2010 Jan winddirection_10m 298 W none
## 2 2010-01-01 01:00:00 1 2010 Jan winddirection_10m 291 W none
## 3 2010-01-01 02:00:00 2 2010 Jan winddirection_10m 290 W none
## 4 2010-01-01 03:00:00 3 2010 Jan winddirection_10m 289 W none
## 5 2010-01-01 04:00:00 4 2010 Jan winddirection_10m 289 W none
## 6 2010-01-01 05:00:00 5 2010 Jan winddirection_10m 288 W none
## 7 2010-01-01 06:00:00 6 2010 Jan winddirection_10m 287 W none
## 8 2010-01-01 07:00:00 7 2010 Jan winddirection_10m 286 W none
## 9 2010-01-01 08:00:00 8 2010 Jan winddirection_10m 285 W none
## 10 2010-01-01 09:00:00 9 2010 Jan winddirection_10m 282 W none
## # … with 113,942 more rows
tmpWindDir %>%
count(month, hour, ew) %>%
group_by(month, hour) %>%
mutate(pct=n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=hour, y=pct)) +
geom_line(aes(color=factor(ew, levels=c("W", "none", "E")))) +
facet_wrap(~month) +
labs(title="Wind direction",
x="Hour of day",
y="% of observations",
subtitle="(030-150 deg defined as East, 210-330 deg defined as West)"
) +
scale_color_discrete("") +
lims(y=c(0, NA))
tmpWindDir %>%
count(month, hour, ns) %>%
group_by(month, hour) %>%
mutate(pct=n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=hour, y=pct)) +
geom_line(aes(color=factor(ns, levels=c("S", "none", "N")))) +
facet_wrap(~month) +
labs(title="Wind direction",
x="Hour of day",
y="% of observations",
subtitle="(300-060 deg defined as North, 120-240 deg defined as South)"
) +
scale_color_discrete("") +
lims(y=c(0, NA))
Hourly wind directions are averaged using arctan. The formula is arctan2(y=sum-of-sin, x=sum-of-cos):
# Unweighted by wind speed
tmpWindatan_uw <- tmpWindDir %>%
mutate(date=lubridate::date(time), cosine=cos(2*pi*value/360), sine=sin(2*pi*value/360)) %>%
group_by(date) %>%
summarize(across(c(cosine, sine), sum), .groups="drop") %>%
mutate(arctangent=atan(sine/cosine),
arctangent2=atan2(y=sine, x=cosine),
avgdir=((arctangent2/2)*(360/pi)),
avgdir=ifelse(avgdir<0, 360+avgdir, avgdir)
) %>%
left_join(select(tmpOMDaily$tblDaily, date, wdd=winddirection_10m_dominant), by="date")
tmpWindatan_uw
## # A tibble: 4,748 × 7
## date cosine sine arctangent arctangent2 avgdir wdd
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 2010-01-01 8.00 -22.2 -1.22 -1.22 290. 291
## 2 2010-01-02 14.8 -18.2 -0.888 -0.888 309. 309
## 3 2010-01-03 16.2 -17.3 -0.817 -0.817 313. 313
## 4 2010-01-04 13.4 -19.8 -0.976 -0.976 304. 304
## 5 2010-01-05 10.9 -21.3 -1.10 -1.10 297. 298
## 6 2010-01-06 1.50 -22.4 -1.50 -1.50 274. 280
## 7 2010-01-07 -9.62 -6.35 0.583 -2.56 213. 240
## 8 2010-01-08 19.6 -11.1 -0.516 -0.516 330. 334
## 9 2010-01-09 13.5 -19.3 -0.962 -0.962 305. 305
## 10 2010-01-10 -13.7 -17.3 0.900 -2.24 232. 226
## # … with 4,738 more rows
tmpWindatan_uw %>%
mutate(delta=avgdir-wdd) %>%
summary()
## date cosine sine arctangent
## Min. :2010-01-01 Min. :-23.894 Min. :-23.930 Min. :-1.5699
## 1st Qu.:2013-04-01 1st Qu.:-14.778 1st Qu.:-14.142 1st Qu.:-0.5057
## Median :2016-07-01 Median : -2.812 Median : -3.161 Median : 0.2924
## Mean :2016-07-01 Mean : -1.955 Mean : -2.518 Mean : 0.1595
## 3rd Qu.:2019-10-01 3rd Qu.: 10.799 3rd Qu.: 8.453 3rd Qu.: 0.8423
## Max. :2022-12-31 Max. : 23.900 Max. : 23.627 Max. : 1.5704
## arctangent2 avgdir wdd delta
## Min. :-3.1408 Min. : 0.1209 Min. : 0.0 Min. :-356.6866
## 1st Qu.:-2.1208 1st Qu.: 92.7324 1st Qu.: 93.0 1st Qu.: -2.5483
## Median :-0.6638 Median :196.5612 Median :198.5 Median : -0.0048
## Mean :-0.4307 Mean :180.2702 Mean :181.2 Mean : -0.9128
## 3rd Qu.: 0.9929 3rd Qu.:255.9577 3rd Qu.:257.0 3rd Qu.: 2.5971
## Max. : 3.1409 Max. :359.9807 Max. :360.0 Max. : 358.5385
tmpWindatan_uw %>%
count(wdd, awdd=round(avgdir)) %>%
ggplot(aes(x=wdd, y=awdd)) +
geom_point(aes(size=n)) +
labs(x="Reported dominant wind direction (daily data)",
y="Calculated dominant wind direction (hourly data)",
title="Relationship between reported and calculated dominant wind direction",
subtitle="Unweighted by wind speed"
) +
scale_size_continuous("# days")
# Weighted by wind speed
tmpWindatan_wtd <- tmpOMHourly$tblHourly %>%
select(date, time, wd=winddirection_10m, ws=windspeed_10m) %>%
mutate(cosine=cos(2*pi*wd/360), sine=sin(2*pi*wd/360)) %>%
group_by(date) %>%
summarize(across(c(cosine, sine), .fns=function(x) sum(x*ws)/sum(ws)), sws=sum(ws), .groups="drop") %>%
mutate(arctangent=atan(sine/cosine),
arctangent2=atan2(y=sine, x=cosine),
avgdir=((arctangent2/2)*(360/pi)),
avgdir=ifelse(avgdir<0, 360+avgdir, avgdir),
avgspd=sws/24,
dist=sws*sqrt(cosine**2+sine**2)
) %>%
left_join(select(tmpOMDaily$tblDaily, date, wdd=winddirection_10m_dominant), by="date")
tmpWindatan_wtd
## # A tibble: 4,914 × 10
## date cosine sine sws arctangent arctang…¹ avgdir avgspd dist wdd
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 2010-01-01 0.345 -0.917 463 -1.21 -1.21 291. 19.3 454. 291
## 2 2010-01-02 0.620 -0.757 478. -0.885 -0.885 309. 19.9 468. 309
## 3 2010-01-03 0.675 -0.722 447. -0.819 -0.819 313. 18.6 441. 313
## 4 2010-01-04 0.561 -0.823 461. -0.973 -0.973 304. 19.2 459. 304
## 5 2010-01-05 0.461 -0.884 392. -1.09 -1.09 298. 16.3 391. 298
## 6 2010-01-06 0.169 -0.945 251. -1.39 -1.39 280. 10.5 241. 280
## 7 2010-01-07 -0.250 -0.426 210 1.04 -2.10 240. 8.75 104. 240
## 8 2010-01-08 0.857 -0.418 471. -0.453 -0.453 334. 19.6 449. 334
## 9 2010-01-09 0.569 -0.798 359. -0.951 -0.951 306. 14.9 351. 305
## 10 2010-01-10 -0.643 -0.672 457. 0.808 -2.33 226. 19.0 425. 226
## # … with 4,904 more rows, and abbreviated variable name ¹arctangent2
tmpWindatan_wtd %>%
mutate(delta=avgdir-wdd) %>%
summary()
## date cosine sine sws
## Min. :2010-01-01 Min. :-0.99564 Min. :-0.9971 Min. : 85.3
## 1st Qu.:2013-05-13 1st Qu.:-0.64915 1st Qu.:-0.6081 1st Qu.:247.4
## Median :2016-09-22 Median :-0.11700 Median :-0.1535 Median :336.6
## Mean :2016-09-22 Mean :-0.07922 Mean :-0.1086 Mean :354.5
## 3rd Qu.:2020-02-02 3rd Qu.: 0.47777 3rd Qu.: 0.3654 3rd Qu.:436.6
## Max. :2023-06-15 Max. : 0.99606 Max. : 0.9855 Max. :933.4
## arctangent arctangent2 avgdir avgspd
## Min. :-1.5707 Min. :-3.1403 Min. : 0.015 Min. : 3.554
## 1st Qu.:-0.4878 1st Qu.:-2.1529 1st Qu.: 91.028 1st Qu.:10.309
## Median : 0.3097 Median :-0.7181 Median :198.011 Median :14.023
## Mean : 0.1642 Mean :-0.4630 Mean :180.505 Mean :14.770
## 3rd Qu.: 0.8461 3rd Qu.: 0.9509 3rd Qu.:257.184 3rd Qu.:18.191
## Max. : 1.5705 Max. : 3.1402 Max. :359.957 Max. :38.892
## dist wdd delta
## Min. : 3.001 Min. : 0.0 Min. :-359.9850
## 1st Qu.:186.211 1st Qu.: 91.0 1st Qu.: -0.2522
## Median :285.688 Median :198.0 Median : 0.0037
## Mean :301.524 Mean :180.7 Mean : -0.1459
## 3rd Qu.:399.462 3rd Qu.:257.0 3rd Qu.: 0.2536
## Max. :920.029 Max. :360.0 Max. : 1.5144
tmpWindatan_wtd %>%
count(wdd, awdd=round(avgdir)) %>%
ggplot(aes(x=wdd, y=awdd)) +
geom_point(aes(size=n)) +
labs(x="Reported dominant wind direction (daily data)",
y="Calculated dominant wind direction (hourly data)",
title="Relationship between reported and calculated dominant wind direction",
subtitle="Weighted by wind speed"
) +
scale_size_continuous("# days")
tmpWindatan_wtd %>%
count(rdist=round(dist), rspd=round(avgspd)) %>%
ggplot(aes(x=rspd, y=rdist/24)) +
geom_point(aes(size=n)) +
labs(title="Average wind speed (total and weighted by direction) by day",
x="Average wind speed per day",
y="Weighted average wind speed\n(total distance on average angle, divided by 24)"
) +
geom_abline(slope=1, intercept=0, lty=2) +
scale_size_continuous("# days")
tmpWindatan_wtd %>%
mutate(rdist=round(dist), rspd=round(avgspd)) %>%
ggplot(aes(x=rspd)) +
geom_boxplot(aes(y=dist/24/rspd, group=rspd), fill="lightblue") +
labs(title="Average wind speed (total and weighted by direction) by day",
x="Average wind speed per day",
y="Average weighted wind speed\n(as ratio of gross average)"
)
tmpWindatan_wtd %>%
filter(abs(avgdir-wdd)>1)
## # A tibble: 7 × 10
## date cosine sine sws arctangent arctang…¹ avgdir avgspd dist
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2011-12-02 0.174 0.0000455 288. 0.000262 0.000262 1.50e-2 12.0 50.0
## 2 2013-07-31 0.0206 0.0414 182. 1.11 1.11 6.35e+1 7.58 8.41
## 3 2016-04-22 0.918 0.00197 406. 0.00215 0.00215 1.23e-1 16.9 373.
## 4 2017-07-31 -0.0306 -0.0569 86.6 1.08 -2.06 2.42e+2 3.61 5.59
## 5 2019-07-17 0.0352 0.0545 139. 0.998 0.998 5.72e+1 5.80 9.03
## 6 2020-11-22 -0.108 0.0523 228. -0.452 2.69 1.54e+2 9.48 27.3
## 7 2023-03-01 0.0482 0.0180 251. 0.358 0.358 2.05e+1 10.5 12.9
## # … with 1 more variable: wdd <int>, and abbreviated variable name ¹arctangent2
The wind direction averaging of hourly data, weighted by wind speed, is consistent with the reported dominant wind direction in the daily data.
Weather codes are explored:
# Hourly weather codes, evapotranspiration, and shortwave
dfHourlyCode <- tmpOMHourly$tblHourly %>%
select(time, hour, wc=weathercode, et=et0_fao_evapotranspiration, sw=shortwave_radiation) %>%
mutate(year=lubridate::year(time),
month=factor(month.abb[lubridate::month(time)], levels=month.abb)
) %>%
pivot_longer(cols=-c(time, year, month, hour))
dfHourlyCode
## # A tibble: 353,808 × 6
## time hour year month name value
## <dttm> <int> <dbl> <fct> <chr> <dbl>
## 1 2010-01-01 00:00:00 0 2010 Jan wc 2
## 2 2010-01-01 00:00:00 0 2010 Jan et 0.02
## 3 2010-01-01 00:00:00 0 2010 Jan sw 0
## 4 2010-01-01 01:00:00 1 2010 Jan wc 1
## 5 2010-01-01 01:00:00 1 2010 Jan et 0.01
## 6 2010-01-01 01:00:00 1 2010 Jan sw 0
## 7 2010-01-01 02:00:00 2 2010 Jan wc 0
## 8 2010-01-01 02:00:00 2 2010 Jan et 0.01
## 9 2010-01-01 02:00:00 2 2010 Jan sw 0
## 10 2010-01-01 03:00:00 3 2010 Jan wc 0
## # … with 353,798 more rows
# Exploration of weather codes overall
dfHourlyCode %>%
filter(name=="wc", year<=2022) %>%
count(value) %>%
ggplot(aes(x=fct_rev(factor(value)), y=n/1000)) +
geom_col(fill="lightblue") +
geom_text(aes(label=round(n/1000, 1)), hjust=0, size=3) +
labs(title="Weather codes in hourly data (2010-2022)", y="Count (000)", x="Weather Code") +
coord_flip()
# Exploration of weather codes by month
dfHourlyCode %>%
filter(name=="wc", year<=2022) %>%
count(month, value) %>%
group_by(month) %>%
mutate(pct=n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=fct_rev(factor(value)), y=pct)) +
geom_col(fill="lightblue") +
geom_text(aes(label=paste0(round(100*pct, 1), "%")), hjust=0, size=3) +
labs(title="Weather codes in hourly data (2010-2022)", y="Frequency", x="Weather Code") +
coord_flip() +
facet_wrap(~month)
# Exploration of weather codes by month
dfHourlyCode %>%
filter(name=="wc", year<=2022) %>%
mutate(wType=case_when(value<=3~"Dry",
value>=51 & value<=55~"Drizzle",
value>=61 & value<=65~"Rain",
value>=71 & value<=75~"Snow",
TRUE~"error"
)
) %>%
count(month, wType) %>%
group_by(month) %>%
mutate(pct=n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=month, y=pct)) +
geom_col(aes(fill=factor(wType, levels=c("Dry", "Drizzle", "Rain", "Snow"))), position="stack") +
labs(title="Precipitation types in hourly data (2010-2022)", y="Frequency", x=NULL) +
coord_flip() +
scale_fill_discrete("")
# Weather codes from WMO Code Table 4677 (select examples)
# 00 Cloud development not observed or not observable
# 01 Clouds generally dissolving or becoming less developed
# 02 State of sky on the whole unchanged
# 03 Clouds generally forming or developing
# 51 Drizzle, not freezing, continuous (slight)
# 53 Drizzle, not freezing, continuous (moderate)
# 55 Drizzle, not freezing, continuous (heavy)
# 61 Rain, not freezing, continuous (slight)
# 63 Rain, not freezing, continuous (moderate)
# 65 Rain, not freezing, continuous (heavy)
# 71 Continuous fall of snowflakes (slight)
# 73 Continuous fall of snowflakes (moderate)
# 75 Continuous fall of snowflakes (heavy)